import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
print("✅ All libraries loaded.")
print("NumPy:", np.__version__)
print("Pandas:", pd.__version__)
print("Seaborn:", sns.__version__)
print("SciPy:", scipy.__version__)
✅ All libraries loaded. NumPy: 1.26.4 Pandas: 2.3.0+4.g1dfc98e16a Seaborn: 0.11.2 SciPy: 1.13.1
df = pd.read_csv('Food_Delivery_Times.csv')
df.head()
| Order_ID | Distance_km | Weather | Traffic_Level | Time_of_Day | Vehicle_Type | Preparation_Time_min | Courier_Experience_yrs | Delivery_Time_min | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 522 | 7.93 | Windy | Low | Afternoon | Scooter | 12 | 1.0 | 43 |
| 1 | 738 | 16.42 | Clear | Medium | Evening | Bike | 20 | 2.0 | 84 |
| 2 | 741 | 9.52 | Foggy | Low | Night | Scooter | 28 | 1.0 | 59 |
| 3 | 661 | 7.44 | Rainy | Medium | Afternoon | Scooter | 5 | 1.0 | 37 |
| 4 | 412 | 19.03 | Clear | Low | Morning | Bike | 16 | 5.0 | 68 |
# viewing dataset info
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1000 entries, 0 to 999 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Order_ID 1000 non-null int64 1 Distance_km 1000 non-null float64 2 Weather 970 non-null object 3 Traffic_Level 970 non-null object 4 Time_of_Day 970 non-null object 5 Vehicle_Type 1000 non-null object 6 Preparation_Time_min 1000 non-null int64 7 Courier_Experience_yrs 970 non-null float64 8 Delivery_Time_min 1000 non-null int64 dtypes: float64(2), int64(3), object(4) memory usage: 70.4+ KB
df.shape
(1000, 9)
# check for duplicated rows
df.duplicated().sum()
0
df['Traffic_Level'].value_counts()
Traffic_Level Medium 390 Low 383 High 197 Name: count, dtype: int64
df['Weather'].value_counts()
Weather Clear 470 Rainy 204 Foggy 103 Snowy 97 Windy 96 Name: count, dtype: int64
df.describe()
| Order_ID | Distance_km | Preparation_Time_min | Courier_Experience_yrs | Delivery_Time_min | |
|---|---|---|---|---|---|
| count | 1000.000000 | 1000.000000 | 1000.000000 | 970.000000 | 1000.000000 |
| mean | 500.500000 | 10.059970 | 16.982000 | 4.579381 | 56.732000 |
| std | 288.819436 | 5.696656 | 7.204553 | 2.914394 | 22.070915 |
| min | 1.000000 | 0.590000 | 5.000000 | 0.000000 | 8.000000 |
| 25% | 250.750000 | 5.105000 | 11.000000 | 2.000000 | 41.000000 |
| 50% | 500.500000 | 10.190000 | 17.000000 | 5.000000 | 55.500000 |
| 75% | 750.250000 | 15.017500 | 23.000000 | 7.000000 | 71.000000 |
| max | 1000.000000 | 19.990000 | 29.000000 | 9.000000 | 153.000000 |
df.isna().sum()
Order_ID 0 Distance_km 0 Weather 30 Traffic_Level 30 Time_of_Day 30 Vehicle_Type 0 Preparation_Time_min 0 Courier_Experience_yrs 30 Delivery_Time_min 0 dtype: int64
# dropping NaNs
df.dropna(inplace = True)
# confirming
df.isna().sum()
Order_ID 0 Distance_km 0 Weather 0 Traffic_Level 0 Time_of_Day 0 Vehicle_Type 0 Preparation_Time_min 0 Courier_Experience_yrs 0 Delivery_Time_min 0 dtype: int64
df.duplicated().sum()
0
df.isnull().sum()
Order_ID 0 Distance_km 0 Weather 0 Traffic_Level 0 Time_of_Day 0 Vehicle_Type 0 Preparation_Time_min 0 Courier_Experience_yrs 0 Delivery_Time_min 0 dtype: int64
df.dtypes
Order_ID int64 Distance_km float64 Weather object Traffic_Level object Time_of_Day object Vehicle_Type object Preparation_Time_min int64 Courier_Experience_yrs float64 Delivery_Time_min int64 dtype: object
categorical_columns = ['Weather', 'Traffic_Level', 'Time_of_Day', 'Vehicle_Type']
for col in categorical_columns:
df[col] = df[col].str.strip().str.lower().str.title()
for col in categorical_columns:
df[col] = df[col].astype('category')
# viewing dataset info
df.info()
<class 'pandas.core.frame.DataFrame'> Index: 883 entries, 0 to 999 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Order_ID 883 non-null int64 1 Distance_km 883 non-null float64 2 Weather 883 non-null category 3 Traffic_Level 883 non-null category 4 Time_of_Day 883 non-null category 5 Vehicle_Type 883 non-null category 6 Preparation_Time_min 883 non-null int64 7 Courier_Experience_yrs 883 non-null float64 8 Delivery_Time_min 883 non-null int64 dtypes: category(4), float64(2), int64(3) memory usage: 45.5 KB
df.corr(numeric_only=True)
| Order_ID | Distance_km | Preparation_Time_min | Courier_Experience_yrs | Delivery_Time_min | |
|---|---|---|---|---|---|
| Order_ID | 1.000000 | -0.010197 | -0.029740 | 0.003005 | -0.012632 |
| Distance_km | -0.010197 | 1.000000 | -0.025518 | -0.003372 | 0.783226 |
| Preparation_Time_min | -0.029740 | -0.025518 | 1.000000 | -0.030776 | 0.296529 |
| Courier_Experience_yrs | 0.003005 | -0.003372 | -0.030776 | 1.000000 | -0.076787 |
| Delivery_Time_min | -0.012632 | 0.783226 | 0.296529 | -0.076787 | 1.000000 |
df.head()
| Order_ID | Distance_km | Weather | Traffic_Level | Time_of_Day | Vehicle_Type | Preparation_Time_min | Courier_Experience_yrs | Delivery_Time_min | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 522 | 7.93 | Windy | Low | Afternoon | Scooter | 12 | 1.0 | 43 |
| 1 | 738 | 16.42 | Clear | Medium | Evening | Bike | 20 | 2.0 | 84 |
| 2 | 741 | 9.52 | Foggy | Low | Night | Scooter | 28 | 1.0 | 59 |
| 3 | 661 | 7.44 | Rainy | Medium | Afternoon | Scooter | 5 | 1.0 | 37 |
| 4 | 412 | 19.03 | Clear | Low | Morning | Bike | 16 | 5.0 | 68 |
# setting up seaborn configurations
sns.set_style('whitegrid')
import seaborn as sns
import matplotlib.pyplot as plt
plt.figure(figsize=(10,5))
sns.barplot(x='Weather', y='Delivery_Time_min', data=df)
plt.title("Impact of Weather on Delivery Time")
plt.show()
C:\ProgramData\Anaconda3\lib\site-packages\seaborn\categorical.py:253: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning. grouped_vals = vals.groupby(grouper)
sns.barplot(x='Traffic_Level', y='Delivery_Time_min', data=df)
plt.title("Traffic vs Delivery Time")
plt.show()
C:\ProgramData\Anaconda3\lib\site-packages\seaborn\categorical.py:253: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning. grouped_vals = vals.groupby(grouper)
sns.barplot(x='Vehicle_Type', y='Delivery_Time_min', data=df)
plt.title("Delivery Time by Vehicle Type")
C:\ProgramData\Anaconda3\lib\site-packages\seaborn\categorical.py:253: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning. grouped_vals = vals.groupby(grouper)
Text(0.5, 1.0, 'Delivery Time by Vehicle Type')
import plotly.express as px
# Group and round the average
df_mean = df.groupby('Courier_Experience_yrs')['Delivery_Time_min'].mean().reset_index()
df_mean = df_mean.round(2)
df_mean = df_mean.sort_values(by='Courier_Experience_yrs')
fig = px.bar(df_mean,
x='Delivery_Time_min',
y='Courier_Experience_yrs',
orientation='h',
text='Delivery_Time_min',
color='Delivery_Time_min',
color_continuous_scale='Blues')
fig.update_layout(
title="Experience vs Delivery Time",
xaxis_title="Avg Delivery Time (min)",
yaxis_title="Courier Experience (Years)",
yaxis=dict(tickmode='linear', tick0=0, dtick=1)
)
fig.show()
import seaborn as sns
import matplotlib.pyplot as plt
sns.barplot(x='Time_of_Day', y='Delivery_Time_min', data=df)
plt.title("Time_of_Day Impact on Delivery Time")
plt.ylabel("Delivery_Time_min")
plt.xlabel("Time_of_Day")
plt.show()
C:\ProgramData\Anaconda3\lib\site-packages\seaborn\categorical.py:253: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
# 1. Copy Data
df_model = df.copy()
# 2. Encode categorical columns
cat_cols = df_model.select_dtypes(include=['category']).columns
df_model[cat_cols] = df_model[cat_cols].apply(LabelEncoder().fit_transform)
# 3. Features and Target
X = df_model.drop(['Order_ID', 'Delivery_Time_min'], axis=1)
y = df_model['Delivery_Time_min']
# 4. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 5. Train model
model = RandomForestRegressor()
model.fit(X_train, y_train)
# 6. Feature Importance
importances = model.feature_importances_
features = X.columns
# 7. Visualize
importance_df = pd.DataFrame({'Feature': features, 'Importance': importances}).sort_values(by='Importance', ascending=False)
sns.barplot(x='Importance', y='Feature', data=importance_df)
plt.title("Feature Importance for Delivery Time Prediction")
plt.show()
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
df_ml = df.copy()
# Encode all categorical features
cat_cols = df_ml.select_dtypes(include='category').columns
le_dict = {} # Save encoders to use later
for col in cat_cols:
le = LabelEncoder()
df_ml[col] = le.fit_transform(df_ml[col])
le_dict[col] = le
# Features and target
X = df_ml.drop(['Order_ID', 'Delivery_Time_min'], axis=1)
y = df_ml['Delivery_Time_min']
# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Train model
model = RandomForestRegressor()
model.fit(X_train, y_train)
# values(optional)
custom_input = {
'Distance_km': 10,
'Weather': le_dict['Weather'].transform(['Windy'])[0],
'Traffic_Level': le_dict['Traffic_Level'].transform(['Medium'])[0],
'Time_of_Day': le_dict['Time_of_Day'].transform(['Morning'])[0],
'Vehicle_Type': le_dict['Vehicle_Type'].transform(['Scooter'])[0],
'Preparation_Time_min': 20,
'Courier_Experience_yrs': 8.0
}
# Convert to DataFrame
input_df = pd.DataFrame([custom_input])
predicted_time = model.predict(input_df)
print(f"📦 Estimated Delivery Time: {predicted_time[0]:.2f} minutes")
📦 Estimated Delivery Time: 60.36 minutes